import pandas as pd

# 该程序汇总了各种类型的总数与（占总数）权重
movie_genre = pd.read_csv(r"DSProject\csv\movie_genre.csv")
genre_summary = movie_genre.iloc[:, 2:].copy()
genre_sums = genre_summary.sum().rename("count")
genre_sums = pd.DataFrame(genre_sums)
genre_sums["weight"] = genre_sums["count"] / genre_sums["count"].sum()
genre_sums.index = [
    "Action",
    "Adventure",
    "Animation",
    "Biography",
    "Comedy",
    "Crime",
    "Documentary",
    "Drama",
    "Family",
    "Fantasy",
    "Film-Noir",
    "History",
    "Horror",
    "Music",
    "Musical",
    "Mystery",
    "Reality-TV",
    "Romance",
    "Sci-Fi",
    "Short",
    "Sport",
    "Talk-Show",
    "Thriller",
    "War",
    "Western",
]
new_index = genre_sums.index.rename("genres")
genre_sums.index = new_index
genre_sums.to_csv(r"DSProject\csv\genres_summary.csv")
